Skip to main content

Suggested hyperparameter tuning

This library works best with hyperparameter tuning. Slightly different parameters can give significantly different results.

This is the suggested approach towards hyperparameter tuning:

  1. First, tune the architecture hyperparameters n_lags, order, fourier, seasonal_features, cyclic_features, time_features.

    import optuna

    def objective(trial):
    n_lags = trial.suggest_int("n_lags", 0, 24)
    order = 0
    fourier = 0
    seasonal_features = trial.suggest_categorical("seasonal_features", [True, False])
    cyclic_features = trial.suggest_categorical("cyclic_features", [True, False])
    time_features = trial.suggest_categorical("time_features", [True, False])

    if seasonal_features:
    order = trial.suggest_int("order", 0, 6)
    fourier = trial.suggest_int("fourier", 0, 6)

    df = pd.read_parquet("data/delivery_data.parquet")

    tsf = TSF(df, "date", "quantity_delivered", "2023-01-01", "2023-12-01", "2024-01-01", "2024-12-01", "D", "M", True, "2018-01-01", "brand", True, [("2023-01-01", "2023-12-01")])

    df, model, total_scores, product_scores = tsf.tsf(n_lags=n_lags, order=order, fourier=fourier, seasonal_features=seasonal_features, cyclic_features=cyclic_features, time_features=time_features)

    return total_scores["r2"]

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100)
    study.best_trial

    This maximizes r2 score. You can chose to minimize mean absolute arror or optimize any other metric instead.

  2. Next, try different models with the optimal architecture and find the best one. Let's say its StackingRegressor with base estimator RandomForestRegressor and final estimator HuberRegressor. Tune hyperparameters of these estimators.

    from sklearn.ensemble import RandomForestRegressor, StackingRegressor
    from sklearn.linear_model import HuberRegressor

    def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 500)
    epsilon = trial.suggest_float("epsilon", 1, 2)
    max_iter = trial.suggest_int("max_iter", 50, 500)
    alpha = trial.suggest_float("alpha", 0.000001, 0.0001)

    df = pd.read_parquet("data/delivery_data.parquet")

    tsf = TSF(df, "date", "quantity_delivered", "2023-01-01", "2023-12-01", "2024-01-01", "2024-12-01", "D", "M", True, "2018-01-01", "brand", True, [("2023-01-01", "2023-12-01")])

    rf = RandomForestRegressor(n_estimators=n_estimators, random_state=42)
    hr = HuberRegressor(epsilon=epsilon, max_iter=max_iter, alpha=alpha)
    model = StackingRegressor(estimators=[("rf", rf)], final_estimator=hr, passthrough=False)

    df, model, total_scores, product_scores = tsf.tsf(model, 8, 1, 1, True, True, False)
    return total_scores["r2"]

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100)
    study.best_trial

Note that different countries/regions may have different set of optimal hyperparameters and different brands/SKUs within a region may have different optimal hyperparameters.